Pull Data

# Load and process ILINet data
df.ilinet <- download_ilinet_data() %>%
  mutate(location_name = region, 
         ili = unweighted_ili, 
         date = MMWRweek2Date(MMWRyear = year, MMWRweek = week, MMWRday = 7)) %>%
  dplyr::select(date, location_name, ili) %>%
  drop_na() %>%
  group_by(location_name) %>%
  mutate(ili = transformed(ili + 1)) %>%
  ungroup() %>%
  drop_na() %>%
  filter(!(location_name %in% c('Virgin Islands'))) |>
  mutate(location_name = ifelse(location_name == 'National', 'US', location_name))

write_csv(df.ilinet, file = file_names$ili_output)

# Load and process hospitalization data
df.hosp <- read_csv(file_names$hospitalization) %>%
  filter(`AGE CATEGORY` == 'Overall' &
           `SEX CATEGORY` == 'Overall' &
           `RACE CATEGORY` == 'Overall') %>%
  mutate(location_name = ifelse(CATCHMENT == 'Entire Network', 'US', CATCHMENT),
         `WEEKLY RATE` = as.numeric(ifelse(`WEEKLY RATE` == 'null', 0, `WEEKLY RATE`))) %>%
  mutate(hosp = log(`WEEKLY RATE` + 1),
         date = MMWRweek2Date(MMWRyear = `MMWR-YEAR`, MMWRweek = `MMWR-WEEK`, MMWRday = 7)) %>%
  filter(date <= mdy('06/30/2019')) %>%
  dplyr::select(date, location_name, hosp) %>%
  filter(!(location_name %in% c('Virgin Islands')))
## Rows: 124800 Columns: 12
## ── Column specification ───────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────
## Delimiter: ","
## chr (10): CATCHMENT, NETWORK, YEAR, AGE CATEGORY, SEX CATEGORY, RACE CATEGOR...
## dbl  (2): MMWR-YEAR, MMWR-WEEK
## 
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.

Merge Data

# Combine ILINet and hospitalization data
df.combined <- df.hosp %>% 
  full_join(df.ilinet, by = c('date', 'location_name')) %>%
  filter(!(location_name %in% c('Virgin Islands')))

Plot Example

# Prepare data for plotting
df.plot <- df.combined %>%
  drop_na() %>%
  pivot_longer(-c(date, location_name))

# Create time series plot
p1 <- df.plot %>%
  ggplot(aes(date, value, color = name)) +
  geom_line(alpha = 0.6) + 
  facet_wrap(~location_name, ncol = 5, scales = 'free_y') + 
  theme_bw() +
  scale_color_manual(name = NULL, values = c(hosp = 'black', ili = 'red')) +
  theme(legend.position = 'top',
        legend.justification = 'right',
        legend.text = element_text(size = 9),
        legend.box.spacing = unit(0, "pt"),
        axis.text.x = element_text(angle = 90, vjust = 0.5, hjust=1))

show(p1)

# Create scatter plot of ILI vs hospitalizations
p2 <- ggplot(df.plot %>% pivot_wider(names_from = name, values_from = value) %>% drop_na(),
             aes(ili, hosp, color = location_name, fill = location_name)) +
  geom_point(alpha = 0.6) + 
  xlab('Normalized Unweighted ILI percentage') +
  ylab('Log(Hospitalizations + 1)') +
  theme_bw(18) +
  scale_color_discrete(name = NULL) +
  scale_fill_discrete(name = NULL) +
  theme(legend.position = "top")
  
show(p2)

# Create faceted scatter plot
p3 <- ggplot(df.plot %>% pivot_wider(names_from = name, values_from = value) %>% drop_na(),
             aes(ili, hosp)) +
  geom_point(alpha = 0.5) + 
  facet_wrap(~location_name, ncol = 5) + 
  xlab('Normalized Unweighted ILI percentage') +
  ylab('Log(Hospitalizations + 1)') +
  theme_bw()
  
show(p3)

#ggsave(plot = p3, 'model_scatter.png', width = 9, height = 3.5, dpi = 300)
#ggsave(plot = p3, 'model_scatter.pdf', width = 9, height = 3.5)

Make Regression Models

# Create linear regression models for each location
df.models <- df.combined %>% 
  drop_na() %>%
  mutate(location = location_name) |>
  select(-location_name) |>
  group_by(location) %>% 
  do({model = lm(hosp ~ ili, data = .);
       data.frame(intercept = round(coef(model)[1], digits = 2),
                  slope = round(coef(model)[2], digits = 2),
                  `p-value` = summary(model)$coefficients[2,4],
                  `R^2` = round(summary(model)$r.squared, digits = 2))})

# Display model results in a table
t1 <- df.models %>%
  kbl() %>%
  kable_paper('striped', 
              full_width = F,
              html_font = 'sans-serif')

save_kable(t1, file = "fit_summary.pdf")
## Note that HTML color may not be displayed on PDF properly.
## save_kable will have the best result with magick installed.
t1
location intercept slope p.value R.2
California 0.38 0.59 0 0.41
Colorado 0.70 0.61 0 0.53
Connecticut 0.75 0.71 0 0.59
Georgia 0.43 0.46 0 0.60
Maryland 0.67 0.54 0 0.43
Minnesota 0.32 0.67 0 0.61
New Mexico 0.51 0.63 0 0.54
Oregon 0.45 0.59 0 0.57
Tennessee 0.52 0.62 0 0.63
US 0.16 0.83 0 0.77

Combined Model

# Create a combined linear regression model
df.final <- df.plot %>% pivot_wider(names_from = name, values_from = value) %>% drop_na()

mod <- lm(hosp ~ ili, df.final)

summary(mod)
## 
## Call:
## lm(formula = hosp ~ ili, data = df.final)
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -1.29912 -0.33575 -0.03279  0.29163  1.75523 
## 
## Coefficients:
##             Estimate Std. Error t value Pr(>|t|)    
## (Intercept) 0.509009   0.009457   53.82   <2e-16 ***
## ili         0.588127   0.010897   53.97   <2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.4612 on 2738 degrees of freedom
## Multiple R-squared:  0.5155, Adjusted R-squared:  0.5153 
## F-statistic:  2913 on 1 and 2738 DF,  p-value: < 2.2e-16

Output Predicted Hospitalizations for Plotting

# Prepare ILINet data for prediction
tmp.ilinet <- download_ilinet_data() %>%
  mutate(location_name = region, 
         ili = unweighted_ili, 
         date = MMWRweek2Date(MMWRyear = year, MMWRweek = week, MMWRday = 7)) %>%
  dplyr::select(date, location_name, ili) %>%
  drop_na() %>%
  group_by(location_name) %>%
  mutate(ili = transformed(ili + 1)) %>%
  ungroup() %>%
  drop_na() %>%
  filter(!(location_name %in% c('Virgin Islands')))

# Predict hospitalizations using the combined model
tmp.predicted.hosp <- bind_cols(tmp.ilinet, pred_hosp = round(exp(predict(mod, tmp.ilinet)) - 1, digits = 2)) %>%
  mutate(pred_hosp = ifelse(pred_hosp < 0, 0, pred_hosp))

#write_csv(tmp.predicted.hosp, file = file_names$output_predicted)

Make Historical Time Series

df.ilinet <- df.ilinet %>%
  pivot_wider(names_from = location_name, values_from = ili) %>%
  fill(everything(), .direction = "down") %>%
  pivot_longer(cols = -date, 
               names_to = "location_name", 
               values_to = "ili") %>%
  arrange(location_name, date) 

# Prepare predicted hospitalization data
predicted.hosp <- bind_cols(df.ilinet, pred_hosp = round(exp(predict(mod, df.ilinet)) - 1, digits = 2)) %>%
  mutate(pred_hosp = ifelse(pred_hosp < 0, 0, pred_hosp))

# Create historical time series plot
p4 <- ggplot(predicted.hosp, aes(date, pred_hosp)) + 
  geom_line() +
  facet_wrap(~location_name, ncol = 3, scale = 'free_y') +
  theme_bw()

show(p4)
## Warning: Removed 679 rows containing missing values or values outside the scale
## range (`geom_line()`).

Pull fractional influenza in ED visits

## Fetching batch: 1 Offset: 0 
## Rows retrieved: 3300 
## Fetching batch: 2 Offset: 3300 
## Rows retrieved: 3300 
## Fetching batch: 3 Offset: 6600 
## Rows retrieved: 3300 
## Fetching batch: 4 Offset: 9900 
## Rows retrieved: 3300 
## Fetching batch: 5 Offset: 13200 
## Rows retrieved: 3300 
## Fetching batch: 6 Offset: 16500 
## Rows retrieved: 3300 
## Fetching batch: 7 Offset: 19800 
## Rows retrieved: 3300 
## Fetching batch: 8 Offset: 23100 
## Rows retrieved: 3300 
## Fetching batch: 9 Offset: 26400 
## Rows retrieved: 3300 
## Fetching batch: 10 Offset: 29700 
## Rows retrieved: 3300 
## Fetching batch: 11 Offset: 33000 
## Rows retrieved: 3300 
## Fetching batch: 12 Offset: 36300 
## Rows retrieved: 3300 
## Fetching batch: 13 Offset: 39600 
## Rows retrieved: 3300 
## Fetching batch: 14 Offset: 42900 
## Rows retrieved: 3300 
## Fetching batch: 15 Offset: 46200 
## Rows retrieved: 3300 
## Fetching batch: 16 Offset: 49500 
## Rows retrieved: 3300 
## Fetching batch: 17 Offset: 52800 
## Rows retrieved: 3300 
## Fetching batch: 18 Offset: 56100 
## Rows retrieved: 3300 
## Fetching batch: 19 Offset: 59400 
## Rows retrieved: 3300 
## Fetching batch: 20 Offset: 62700 
## Rows retrieved: 3300 
## Fetching batch: 21 Offset: 66000 
## Rows retrieved: 3300 
## Fetching batch: 22 Offset: 69300 
## Rows retrieved: 3300 
## Fetching batch: 23 Offset: 72600 
## Rows retrieved: 3300 
## Fetching batch: 24 Offset: 75900 
## Rows retrieved: 3300 
## Fetching batch: 25 Offset: 79200 
## Rows retrieved: 3300 
## Fetching batch: 26 Offset: 82500 
## Rows retrieved: 3300 
## Fetching batch: 27 Offset: 85800 
## Rows retrieved: 3300 
## Fetching batch: 28 Offset: 89100 
## Rows retrieved: 3300 
## Fetching batch: 29 Offset: 92400 
## Rows retrieved: 3300 
## Fetching batch: 30 Offset: 95700 
## Rows retrieved: 3300 
## Fetching batch: 31 Offset: 99000 
## Rows retrieved: 3300 
## Fetching batch: 32 Offset: 102300 
## Rows retrieved: 3300 
## Fetching batch: 33 Offset: 105600 
## Rows retrieved: 3300 
## Fetching batch: 34 Offset: 108900 
## Rows retrieved: 3300 
## Fetching batch: 35 Offset: 112200 
## Rows retrieved: 3300 
## Fetching batch: 36 Offset: 115500 
## Rows retrieved: 3300 
## Fetching batch: 37 Offset: 118800 
## Rows retrieved: 3300 
## Fetching batch: 38 Offset: 122100 
## Rows retrieved: 3300 
## Fetching batch: 39 Offset: 125400 
## Rows retrieved: 3300 
## Fetching batch: 40 Offset: 128700 
## Rows retrieved: 3300 
## Fetching batch: 41 Offset: 132000 
## Rows retrieved: 3300 
## Fetching batch: 42 Offset: 135300 
## Rows retrieved: 3300 
## Fetching batch: 43 Offset: 138600 
## Rows retrieved: 3300 
## Fetching batch: 44 Offset: 141900 
## Rows retrieved: 3300 
## Fetching batch: 45 Offset: 145200 
## Rows retrieved: 3300 
## Fetching batch: 46 Offset: 148500 
## Rows retrieved: 3300 
## Fetching batch: 47 Offset: 151800 
## Rows retrieved: 3300 
## Fetching batch: 48 Offset: 155100 
## Rows retrieved: 3300 
## Fetching batch: 49 Offset: 158400 
## Rows retrieved: 3300 
## Fetching batch: 50 Offset: 161700 
## Rows retrieved: 3300 
## Fetching batch: 51 Offset: 165000 
## Rows retrieved: 3300 
## Fetching batch: 52 Offset: 168300 
## Rows retrieved: 3300 
## Fetching batch: 53 Offset: 171600 
## Rows retrieved: 3300 
## Fetching batch: 54 Offset: 174900 
## Rows retrieved: 3300 
## Fetching batch: 55 Offset: 178200 
## Rows retrieved: 3300 
## Fetching batch: 56 Offset: 181500 
## Rows retrieved: 3300 
## Fetching batch: 57 Offset: 184800 
## Rows retrieved: 3300 
## Fetching batch: 58 Offset: 188100 
## Rows retrieved: 3300 
## Fetching batch: 59 Offset: 191400 
## Rows retrieved: 3300 
## Fetching batch: 60 Offset: 194700 
## Rows retrieved: 3300 
## Fetching batch: 61 Offset: 198000 
## Rows retrieved: 3300 
## Fetching batch: 62 Offset: 201300 
## Rows retrieved: 3300 
## Fetching batch: 63 Offset: 204600 
## Rows retrieved: 3300 
## Fetching batch: 64 Offset: 207900 
## Rows retrieved: 3300 
## Fetching batch: 65 Offset: 211200 
## Rows retrieved: 3300 
## Fetching batch: 66 Offset: 214500 
## Rows retrieved: 3300 
## Fetching batch: 67 Offset: 217800 
## Rows retrieved: 3300 
## Fetching batch: 68 Offset: 221100 
## Rows retrieved: 3300 
## Fetching batch: 69 Offset: 224400 
## Rows retrieved: 3300 
## Fetching batch: 70 Offset: 227700 
## Rows retrieved: 3300 
## Fetching batch: 71 Offset: 231000 
## Rows retrieved: 3300 
## Fetching batch: 72 Offset: 234300 
## Rows retrieved: 3300 
## Fetching batch: 73 Offset: 237600 
## Rows retrieved: 3300 
## Fetching batch: 74 Offset: 240900 
## Rows retrieved: 3300 
## Fetching batch: 75 Offset: 244200 
## Rows retrieved: 3300 
## Fetching batch: 76 Offset: 247500 
## Rows retrieved: 3300 
## Fetching batch: 77 Offset: 250800 
## Rows retrieved: 3300 
## Fetching batch: 78 Offset: 254100 
## Rows retrieved: 3300 
## Fetching batch: 79 Offset: 257400 
## Rows retrieved: 3300 
## Fetching batch: 80 Offset: 260700 
## Rows retrieved: 3300 
## Fetching batch: 81 Offset: 264000 
## Rows retrieved: 3300 
## Fetching batch: 82 Offset: 267300 
## Rows retrieved: 3300 
## Fetching batch: 83 Offset: 270600 
## Rows retrieved: 3300 
## Fetching batch: 84 Offset: 273900 
## Rows retrieved: 3300 
## Fetching batch: 85 Offset: 277200 
## Rows retrieved: 3300 
## Fetching batch: 86 Offset: 280500 
## Rows retrieved: 3300 
## Fetching batch: 87 Offset: 283800 
## Rows retrieved: 3300 
## Fetching batch: 88 Offset: 287100 
## Rows retrieved: 3300 
## Fetching batch: 89 Offset: 290400 
## Rows retrieved: 3300 
## Fetching batch: 90 Offset: 293700 
## Rows retrieved: 3300 
## Fetching batch: 91 Offset: 297000 
## Rows retrieved: 3300 
## Fetching batch: 92 Offset: 300300 
## Rows retrieved: 3300 
## Fetching batch: 93 Offset: 303600 
## Rows retrieved: 3300 
## Fetching batch: 94 Offset: 306900 
## Rows retrieved: 3300 
## Fetching batch: 95 Offset: 310200 
## Rows retrieved: 3300 
## Fetching batch: 96 Offset: 313500 
## Rows retrieved: 3300 
## Fetching batch: 97 Offset: 316800 
## Rows retrieved: 3300 
## Fetching batch: 98 Offset: 320100 
## Rows retrieved: 3300 
## Fetching batch: 99 Offset: 323400 
## Rows retrieved: 3300 
## Fetching batch: 100 Offset: 326700 
## Rows retrieved: 3300 
## Fetching batch: 101 Offset: 330000 
## Rows retrieved: 3300 
## Fetching batch: 102 Offset: 333300 
## Rows retrieved: 3300 
## Fetching batch: 103 Offset: 336600 
## Rows retrieved: 3300 
## Fetching batch: 104 Offset: 339900 
## Rows retrieved: 3300 
## Fetching batch: 105 Offset: 343200 
## Rows retrieved: 3300 
## Fetching batch: 106 Offset: 346500 
## Rows retrieved: 3300 
## Fetching batch: 107 Offset: 349800 
## Rows retrieved: 3300 
## Fetching batch: 108 Offset: 353100 
## Rows retrieved: 3300 
## Fetching batch: 109 Offset: 356400 
## Rows retrieved: 3300 
## Fetching batch: 110 Offset: 359700 
## Rows retrieved: 3300 
## Fetching batch: 111 Offset: 363000 
## Rows retrieved: 3300 
## Fetching batch: 112 Offset: 366300 
## Rows retrieved: 3300 
## Fetching batch: 113 Offset: 369600 
## Rows retrieved: 3300 
## Fetching batch: 114 Offset: 372900 
## Rows retrieved: 3300 
## Fetching batch: 115 Offset: 376200 
## Rows retrieved: 3300 
## Fetching batch: 116 Offset: 379500 
## Rows retrieved: 3300 
## Fetching batch: 117 Offset: 382800 
## Rows retrieved: 3300 
## Fetching batch: 118 Offset: 386100 
## Rows retrieved: 3300 
## Fetching batch: 119 Offset: 389400 
## Rows retrieved: 3300 
## Fetching batch: 120 Offset: 392700 
## Rows retrieved: 3300 
## Fetching batch: 121 Offset: 396000 
## Rows retrieved: 3300 
## Fetching batch: 122 Offset: 399300 
## Rows retrieved: 3300 
## Fetching batch: 123 Offset: 402600 
## Rows retrieved: 3300 
## Fetching batch: 124 Offset: 405900 
## Rows retrieved: 3300 
## Fetching batch: 125 Offset: 409200 
## Rows retrieved: 3300 
## Fetching batch: 126 Offset: 412500 
## Rows retrieved: 3300 
## Fetching batch: 127 Offset: 415800 
## Rows retrieved: 3300 
## Fetching batch: 128 Offset: 419100 
## Rows retrieved: 3300 
## Fetching batch: 129 Offset: 422400 
## Rows retrieved: 3300 
## Fetching batch: 130 Offset: 425700 
## Rows retrieved: 3300 
## Fetching batch: 131 Offset: 429000 
## Rows retrieved: 3300 
## Fetching batch: 132 Offset: 432300 
## Rows retrieved: 3300 
## Fetching batch: 133 Offset: 435600 
## Rows retrieved: 3300 
## Fetching batch: 134 Offset: 438900 
## Rows retrieved: 3300 
## Fetching batch: 135 Offset: 442200 
## Rows retrieved: 3300 
## Fetching batch: 136 Offset: 445500 
## Rows retrieved: 3300 
## Fetching batch: 137 Offset: 448800 
## Rows retrieved: 3300 
## Fetching batch: 138 Offset: 452100 
## Rows retrieved: 3300 
## Fetching batch: 139 Offset: 455400 
## Rows retrieved: 3300 
## Fetching batch: 140 Offset: 458700 
## Rows retrieved: 3300 
## Fetching batch: 141 Offset: 462000 
## Rows retrieved: 3300 
## Fetching batch: 142 Offset: 465300 
## Rows retrieved: 3300 
## Fetching batch: 143 Offset: 468600 
## Rows retrieved: 3300 
## Fetching batch: 144 Offset: 471900 
## Rows retrieved: 1958 
## Total batches: 144 Total rows: 473858
## [1] "Total rows retrieved: 473858"

Pull new NHSN data

fetch_flu <- function() {
  
  #read data from data.cdc.gov, filtering for when flu reporting became mandatory
  health_data = RSocrata::read.socrata(url = "https://data.cdc.gov/resource/mpgq-jmmr.json") %>% 
    dplyr::filter(weekendingdate >= as.Date("2024-05-01"))
  
  #remove  VI and AS as they are not included for FluSight, keep only necessary vars and add epiweek and epiyear 
  recent_data = health_data %>% 
    dplyr::filter(!jurisdiction %in% c("VI", "AS", "GU", "MP")) %>% 
    dplyr::select(jurisdiction, weekendingdate, totalconfflunewadm) %>% 
    dplyr::rename("value" = "totalconfflunewadm", "date"="weekendingdate", "state"="jurisdiction") %>% 
    dplyr::mutate(date = as.Date(date), 
                  value = as.numeric(value),
                  state = str_replace(state, "USA", "US"))
  
  #bind state population data
  full_data = dplyr::left_join(recent_data, cdc.locations.file, by = join_by("state" == "abbreviation"))

  # Drop rows with missing location_name, group and interpolate non-numeric/NA values in `value`
  full_data <- full_data %>%
    dplyr::filter(!is.na(location_name)) %>%
    dplyr::arrange(location_name, date) %>%
    dplyr::group_by(location_name) %>%
    dplyr::mutate(value = zoo::na.approx(value, x = as.numeric(date), na.rm = FALSE)) %>%
    dplyr::ungroup()
  
  #calculates weekly rate 
  final_dat <- full_data %>% 
    left_join(population, by = c("location_name")) %>%
    dplyr::mutate(weekly_rate = (value*100000)/population) %>% 
    select(date, location, location_name, value, weekly_rate)
  
  return(final_dat)
}

most_recent_flu <- fetch_flu() |>
  mutate(total_hosp = value) |>
  select(date, location_name, total_hosp)
## Warning in RSocrata::read.socrata(url =
## "https://data.cdc.gov/resource/mpgq-jmmr.json"): Dates and currency fields will
## be converted to character

Compare To Current Data

interpolate_internal <- function(y, x) {
  # Perform spline interpolation (includes extrapolation)
  spline_values <- na.spline(y, x = x)
  
  # Use na.approx() to identify internal NAs (without extrapolation)
  internal_nas <- is.na(na.approx(y, x = x, na.rm = FALSE))
  
  # Set extrapolated values to NA
  spline_values[internal_nas] <- NA
  
  return(spline_values)
}

# Load and process historical hospitalization data
df.old.true.hosp <- read_csv(file_names$old_true_hosp_1) %>%
  filter(date >= mdy('07/01/2021') & date <  mdy('06/01/2022')) %>%
  mutate(true_hosp = value) %>%
  select(-value, -location)
## Rows: 10010 Columns: 4
## ── Column specification ───────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────
## Delimiter: ","
## chr  (2): location, location_name
## dbl  (1): value
## date (1): date
## 
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
# Load and process current hospitalization data
df.true.hosp <- read_csv(file_names$old_true_hosp_2) %>%
  filter(date >= mdy('06/01/2022')) %>%
  mutate(true_hosp = value) %>%
  select(-'...1', -value, -location, -weekly_rate) %>%
  full_join(df.old.true.hosp)
## New names:
## Rows: 6148 Columns: 6
## ── Column specification
## ─────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────── Delimiter: "," chr
## (2): location, location_name dbl (3): ...1, value, weekly_rate date (1): date
## ℹ Use `spec()` to retrieve the full column specification for this data. ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
## Joining with `by = join_by(date, location_name, true_hosp)`
## • `` -> `...1`
# Combine predicted and true hospitalization data
df.combined.hosp <- predicted.hosp %>% 
  filter(date <= mdy('06/30/2019')) %>%
  mutate(date = date + days(728)) %>%
  full_join(df.true.hosp, by = c('date', 'location_name')) %>%
  full_join(predicted.hosp %>% filter(date > max(df.true.hosp$date))) %>%
  full_join(df.ed, by = c('date', 'location_name')) %>%
  mutate(scale_factor = ifelse(is.na(scale_factor), 1, scale_factor)) %>%
  inner_join(population, by = 'location_name') %>%
  mutate(prelim_hosp = round(coalesce(pred_hosp * population / 100000, true_hosp))) %>%
  mutate(total_hosp = round(prelim_hosp * scale_factor)) %>%
  #mutate(total_hosp = round(prelim_hosp)) |>
  filter(date < mdy('05/01/2024')) |>
  bind_rows(most_recent_flu) |>
  arrange(location_name, date) %>%
  filter(location_name != 'Virgin Islands') |>
  group_by(location_name) |>
  complete(date = seq(min(date), max(date), by = "week")) |>
  mutate(total_hosp = interpolate_internal(total_hosp, as.numeric(date))) |>
  mutate(total_hosp = ifelse(total_hosp < 0, 0, total_hosp)) |>
  ungroup()
## Joining with `by = join_by(date, location_name, ili, pred_hosp)`
# Sanity check for date differences
sanity <- df.combined.hosp %>% 
  arrange(location_name, date) %>% 
  group_by(location_name) %>% 
  mutate(diffs = difftime(date , lag(date, 1)))
unique(sanity$diffs)
## [1] NA  7
# Create plot of combined hospitalization data
p5 <- ggplot(df.combined.hosp, aes(date, total_hosp)) + 
  geom_point(size = 1.1, alpha = 0.3) +
  geom_line(alpha = 0.5) +
  facet_wrap(~location_name, ncol = 3, scales = 'free_y') +
  theme_bw()

show(p5)
## Warning: Removed 35464 rows containing missing values or values outside the
## scale range (`geom_point()`).
## Warning: Removed 679 rows containing missing values or values outside the scale
## range (`geom_line()`).

Output Stitched Time Series

# Save combined hospitalization data
#write_csv(df.combined.hosp, file_names$output_final)
write_csv(df.combined.hosp, file_names$output_final_save)

Compare To Current Data

df.combined.hosp <- read_csv(file_names$output_final_save)
## Rows: 71815 Columns: 9
## ── Column specification ───────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────
## Delimiter: ","
## chr  (1): location_name
## dbl  (7): ili, pred_hosp, true_hosp, scale_factor, population, prelim_hosp, ...
## date (1): date
## 
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
# Create plot of recent hospitalization data
p6 <- ggplot(df.combined.hosp %>% filter(date >= mdy('06/01/2023')), aes(date, total_hosp)) + 
  geom_point(size = 1.1, alpha = 0.3) +
  geom_line(alpha = 0.5) +
  facet_wrap(~location_name, ncol = 3, scales = 'free_y') +
  theme_bw()

show(p6)